Using mutate can sometimes feel like…
But soon you will be a mutate expert and everything will be…
awesome.
library("dplyr")
library("purrr")mutate(grade_group = ifelse(Grade >= 5 & Grade <= 8 & Subject == "ELA", "5-8",
ifelse(Grade >= 5 & Grade <= 8 & Subject == "Math", "5-7",
ifelse(Grade < 5, "3-4", "Unknown")
)))mutate(grade_group = ifelse(Grade >= 5 & Grade <= 8 & Subject == "ELA", "5-8", NA),
grade_group = ifelse(Grade >= 5 & Grade <= 8 & Subject == "Math", "5-7", grade_group),
grade_group = ifelse(Grade < 5, "3-4", "grade_group"))grade_group <- tibble(grade = 3:8,
ela_group = c("3-4", "3-4", rep("5-8", 4)),
math_group = c("3-4", "3-4", rep("5-7", 4))
)
grade_group## # A tibble: 6 x 3
## grade ela_group math_group
## <int> <chr> <chr>
## 1 3 3-4 3-4
## 2 4 3-4 3-4
## 3 5 5-8 5-7
## 4 6 5-8 5-7
## 5 7 5-8 5-7
## 6 8 5-8 5-7
results <- results %>% left_join(grade_group, by="grade")group_by and mutateExample: So I have this file with student profile info, but each student has one record per school year…so ying has 9th grade, 10th grade, 11th grade. i don’t trust that the demographics info in each year is identical, so i want to create a file with one row per student, taking the latest year
Option 1. Arrange by Student and desc(Year), then filter “First” of every student Option 2. Filter
df %>%
group_by(student_id) %>%
mutate(max_year = max(school_year) %>%
filter(school_year == max_year) %>%
ungroup()df <- tibble(school = c("EBCS", "EBCS"), score = c(100, 50), teacher = c("x", "y"))
df %>%
group_by(school) %>%
mutate(max_score_by_school = max(score),
best_teacher = map2_chr(score, teacher, ~.y[which.max(.x)])
)## Warning: package 'bindrcpp' was built under R version 3.3.3
## # A tibble: 2 x 5
## # Groups: school [1]
## school score teacher max_score_by_school best_teacher
## <chr> <dbl> <chr> <dbl> <chr>
## 1 EBCS 100 x 100 x
## 2 EBCS 50 y 100 y
mutate_at/summarise_at:as.numeric())mutate_at(vars(), funs())results %>%
mutate_at(vars(score, grade), as.numeric)mutate_if/summarise_if:summarise_if(is.numeric, funs(mean, min, max, sd, n))
summarise_if(is.numeric, as.character)
summarise_if(is.numeric, as.factor)verify(nrow(get_dupes(., student_id))==0)